########################################################
## Replication of Main Document
########################################################
rm(list=ls())
start_time <- Sys.time()
########################################################
## Package loading
########################################################
library(BridgeChange)
library(devtools)  
library(coda)
library(plm)
library(pforeach)
library(dplyr)
library(colorspace)
library(monomvn)
library(glmnet)
library(MASS)
library(MCMCpack)
library(pcse)
library(pastecs)
library(xtable)
library(bayesplot)
library(dotwhisker)
library(broom)
library(stableGR)
library(tidyverse)
library(lmtest)  #
source("helper.R")

########################################################################
## Application 2: Allee and Scalera 2012 IO
########################################################################
load("AS2012 IO Imp Data.RData")
## time index fix!
x$year <- ifelse(is.na(x$year), 2009, x$year)

##########################
## AS2012 Models
##########################
formula1 <- lnFtrade ~ lnpop1 + gled_gdppc + totalcont + polity + domestic1_9 + member
formula2 <- lnFtrade ~ lnpop1 + gled_gdppc + totalcont + polity + domestic1_9 + memce + rigorous
formula3 <- lnFtrade ~ lnpop1 + gled_gdppc + totalcont + polity + domestic1_9 + memre + colonial
formula4 <- lnFtrade ~ lnpop1 + gled_gdppc + totalcont + polity + domestic1_9 + memrc + earlymem
formula5 <- lnFtrade ~ lnpop1 + gled_gdppc + totalcont + polity + domestic1_9 + rigorous
formula6 <- lnFtrade ~ lnpop1 + gled_gdppc + totalcont + polity + domestic1_9 + rigorous +
    rigorouscounter + colonial + colonialcounter + earlymem + earlymemcounter
formula52 <- lnFtrade ~ lnpop1 + gled_gdppc + totalcont + polity + domestic1_9 + rigorous +
    colonial + earlymem
formula52.inter <- lnFtrade ~ (lnpop1 + gled_gdppc + totalcont + polity + domestic1_9 + rigorous +
                                   colonial + earlymem)^2
formula6.inter <- lnFtrade ~ (lnpop1 + gled_gdppc + totalcont + polity + domestic1_9 + rigorous +
    rigorouscounter + colonial + colonialcounter + earlymem + earlymemcounter)^2

####################################
## rigorous
####################################
data=subset(x, imp == 1 ) ## & year > 1955)
mat <- data.frame(apply(table(data$rigorous, data$year), 1, cumsum))
colnames(mat) <- c("all", "yes")
mat <- tibble(year = 1946:2009,
              all = mat$all,
              yes = mat$yes)
mat <- mat %>% mutate(prop = yes/all)
## ggplot(mat, aes(year, prop)) + geom_point() + geom_line()

## clustered error
## G <- length(unique(p.df$uncode))
## N <- length(p.df$uncode)
## dfa <- (G/(G - 1)) * (N - 1)/pm1$df.residual
## display with cluster VCE and df-adjustment
## c_vcov <- dfa * vcovHC(pm1, type = "HC0", cluster = "group", adjust = T)
## coeftest(pm1, vcov = c_vcov)

## HMBB estimation
## It takes a long time. Try a short mcmc run for the test
set.seed(11133)
mcmc = 1000; burn = 1000; verbose = 1000; thin = 1;
AS <- subset(x, imp == 1)

as.cp0 <- BridgeFixedPanelPool(formula.p=formula52, data.p = AS, standardize.p = TRUE, 
                                index.p = c('uncode', 'year'), model.p = 'within',  
                                effect.p = 'twoway', alpha.MH = TRUE,
                                Waic.p = TRUE, marginal.p = FALSE,
                                mcmc.p = mcmc, burn.p = burn,
                                thin.p = thin, 
                                verbose.p=verbose, break.upper = 2,
                               interaction.p = 1)

as.cp1 <- BridgeFixedPanelPool(formula.p=formula6, data.p = AS, standardize.p = TRUE, 
                                index.p = c('uncode', 'year'), model.p = 'within',  
                                effect.p = 'twoway', alpha.MH = TRUE,
                                Waic.p = TRUE, marginal.p = FALSE,
                                mcmc.p = mcmc, burn.p = burn,
                                thin.p = thin,
                                verbose.p=verbose, break.upper = 2,
                                interaction.p = 1)



w0 <- attr(as.cp0, "Waic")
w1 <- attr(as.cp1, "Waic")

#########################
## Table 2
#########################
tab2 <- xtable(matrix(unlist(rbind(w0, w1)[,1]), 2, 3, byrow=TRUE), digit=0)
print(tab2, file="Table2.tex")


#########################
## Figure 7
#########################
## Figure 7a: state transitions
pdf(file="Figure7a.pdf", family="sans", height = 3, width = 12)
par(mfrow=c(1,4))
plotState(as.cp0[[2]], main="Column 5 (break 1)", start = 1946)
plotState(as.cp0[[3]], main="Column 5 (break 2)", start = 1946)
plotState(as.cp1[[2]], main="Column 6 (break 1)", start = 1946)
plotState(as.cp1[[3]], main="Column 6 (break 2)", start = 1946)
## plotState(as.cp3[[2]], main="Column 6 (Full interaction)", start = 1946)
dev.off()


## Figure 7b: m1 vs m2
pdf(file="Figure7b.pdf", width=13, height = 7, family="sans")
par(mfrow=c(1, 2))
set.seed(99)
dotplotRegime(as.cp1[[2]], hybrid=FALSE, start = 1946, text.cex=0.8, x.location="random",
              main="Column 6 (break 1)")
dotplotRegime(as.cp1[[3]], hybrid=FALSE, start = 1946, text.cex=0.8, x.location="random",
              main="Column 6 (break 2)")
dev.off()




#########################
## Figure 8: compare estimates
#########################
AS.scale <- AS
AS.scale$lnFtrade <- scale(AS$lnFtrade)
AS.scale$lnpop1 <- scale(AS$lnpop1)
AS.scale$gled_gdppc<- scale(AS$gled_gdppc)
AS.scale$totalcont <- scale(AS$totalcont)
AS.scale$polity <- scale(AS$polity)
AS.scale$domestic1_9 <- scale(AS$domestic1_9)
AS.scale$rigorous <- scale(AS$rigorous)
AS.scale$colonial <- scale(AS$colonial)
AS.scale$earlymem <- scale(AS$earlymem)


p.df.scaled <- pdata.frame(AS.scale, index = c("uncode", "year"), drop.index = FALSE) 
plm.fit <- plm(formula6, data = p.df.scaled, model="within", effect="twoways")
data = AS.scale 
G <- length(unique(data$uncode))
N <- length(data$uncode)
dfa <- (G/(G - 1)) * (N - 1)/plm.fit$df.residual
## display with cluster VCE and df-adjustment
c_vcov <- dfa * vcovHC(plm.fit, type = "HC0", cluster = "group", adjust = TRUE)
coef.mat <- coeftest(plm.fit, vcov = c_vcov)
var.names <- rownames(coef.mat)
## rownames(coef.mat) <- gsub("X", "", var.names)

## 2. bayesian outputs
output <- as.cp1[[3]]
coef.out <- summary(output)[[1]][grep("beta", rownames(summary(output)[[1]])), ]
coef.hpd <- summary(output)[[2]][grep("beta", rownames(summary(output)[[2]])), ]

coef.regime1 <- as.data.frame(coef.out[grep("regime1", rownames(coef.out)), ])
coef.hpd1 <- as.data.frame(coef.hpd[grep("regime1", rownames(coef.hpd)), ])
rownames(coef.regime1) <- rownames(coef.mat)

coef.regime2 <- as.data.frame(coef.out[grep("regime2", rownames(coef.out)), ])
coef.hpd2 <- as.data.frame(coef.hpd[grep("regime2", rownames(coef.hpd)), ])
rownames(coef.regime2) <- rownames(coef.mat)

coef.regime3 <- as.data.frame(coef.out[grep("regime3", rownames(coef.out)), ])
coef.hpd3 <- as.data.frame(coef.hpd[grep("regime3", rownames(coef.hpd)), ])
rownames(coef.regime3) <- rownames(coef.mat)

## 3. compare results
m1_df <- tibble(var = rownames(coef.mat),
                estimate = coef.mat[,1],
                upper = coef.mat[,1] + 1.96*coef.mat[,2],
                lower = coef.mat[,1] - 1.96*coef.mat[,2],) %>%
    mutate(model = "Fixed-effects (pooled)")
m2_df <- tibble(var = rownames(coef.regime1),
                estimate = coef.regime1$Mean,
                upper = coef.hpd1$`97.5%`,
                lower = coef.hpd1$`2.5%`)%>%
    mutate(model = "HMBB Regime 1")
m3_df <- tibble(var = rownames(coef.regime2),
                estimate = coef.regime2$Mean,
                upper = coef.hpd2$`97.5%`,
                lower = coef.hpd2$`2.5%`)%>%
    mutate(model = "HMBB Regime 2")

m4_df <- tibble(var = rownames(coef.regime3),
                estimate = coef.regime3$Mean,
                upper = coef.hpd3$`97.5%`,
                lower = coef.hpd3$`2.5%`)%>%
    mutate(model = "HMBB Regime 3")
three_models <- rbind(m1_df, m2_df, m3_df, m4_df)

pdf(file="Figure8.pdf", width=8, height=6, family="sans")
ggplot(three_models, aes(x=var, y = estimate, col=model, shape=model)) +        # ggplot2 plot with confidence intervals
  geom_point(size = 2, alpha=0.6, position=position_dodge(width=0.5)) +
  geom_errorbar(
        aes(ymin = lower, ymax = upper),
        width = 0.1,
        position=position_dodge(width=0.5)) +
    geom_hline(aes(yintercept = 0), linetype = "dashed") +
    theme_bw()+
    theme(legend.position="bottom") + 
    coord_flip() + 
    labs(title="Fixed-effects and HMBB Estimates", x="Covariates", y="Estimate", col="Model", shape="Model")
dev.off()

    
end_time <- Sys.time()
print(end_time - start_time)
